import os
import random


def generate_dataset(size, repeat_num):
    if size == repeat_num:
        return [0] * size
    _dataset = list(range(size - repeat_num)) + random.choices(range(size - repeat_num), k=repeat_num)
    random.shuffle(_dataset)
    return _dataset


def load_dataset(filename):
    with open(filename) as _f:
        return [int(each) for each in _f]


if __name__ == '__main__':
    # print(random.sample(range(100), 10))
    # print(generate_dataset(10, 5))
    for i in range(11):
        with open(os.path.join('datasets', f'dataset-{i}.txt'), 'w') as f:
            print(f'Generating dataset. size is {10 ** 6}, repeat num is {10 ** 6 * i * 10 // 100}')
            dataset = generate_dataset(10 ** 6, 10 ** 6 * i * 10 // 100)
            dataset = [str(each) + '\n' for each in dataset]
            f.writelines(dataset)
    print('Finished.')
